1   package org.apache.lucene.facet.taxonomy;
2   
3   import java.io.IOException;
4   import java.io.PrintWriter;
5   import java.io.StringWriter;
6   import java.util.ArrayList;
7   import java.util.Arrays;
8   import java.util.concurrent.atomic.AtomicBoolean;
9   
10  import org.apache.lucene.facet.FacetTestCase;
11  import org.apache.lucene.facet.SlowRAMDirectory;
12  import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
13  import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
14  import org.apache.lucene.store.Directory;
15  import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
16  import org.junit.Test;
17  
18  /*
19   * Licensed to the Apache Software Foundation (ASF) under one or more
20   * contributor license agreements.  See the NOTICE file distributed with
21   * this work for additional information regarding copyright ownership.
22   * The ASF licenses this file to You under the Apache License, Version 2.0
23   * (the "License"); you may not use this file except in compliance with
24   * the License.  You may obtain a copy of the License at
25   *
26   *     http://www.apache.org/licenses/LICENSE-2.0
27   *
28   * Unless required by applicable law or agreed to in writing, software
29   * distributed under the License is distributed on an "AS IS" BASIS,
30   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
31   * See the License for the specific language governing permissions and
32   * limitations under the License.
33   */
34  
35  @SuppressCodecs("SimpleText")
36  public class TestTaxonomyCombined extends FacetTestCase {
37  
38    /**  The following categories will be added to the taxonomy by
39      fillTaxonomy(), and tested by all tests below:
40    */
41    private final static String[][] categories = {
42      { "Author", "Tom Clancy" },
43      { "Author", "Richard Dawkins" },
44      { "Author", "Richard Adams" },
45      { "Price", "10", "11" },
46      { "Price", "10", "12" },
47      { "Price", "20", "27" },
48      { "Date", "2006", "05" },
49      { "Date", "2005" },
50      { "Date", "2006" },
51      { "Subject", "Nonfiction", "Children", "Animals" },
52      { "Author", "Stephen Jay Gould" },
53      { "Author", "\u05e0\u05d3\u05d1\u3042\u0628" },
54    };
55    
56    /**  When adding the above categories with TaxonomyWriter.addCategory(), 
57      the following paths are expected to be returned:
58      (note that currently the full path is not returned, and therefore
59      not tested - rather, just the last component, the ordinal, is returned
60      and tested.
61    */
62    private final static int[][] expectedPaths = {
63      { 1, 2 },
64      { 1, 3 },
65      { 1, 4 },
66      { 5, 6, 7 },
67      { 5, 6, 8 },
68      { 5, 9, 10 },
69      { 11, 12, 13 },
70      { 11, 14 },
71      { 11, 12 },
72      { 15, 16, 17, 18 },
73      { 1, 19 },
74      { 1, 20 }
75    };
76  
77    /**  The taxonomy index is expected to then contain the following
78      generated categories, with increasing ordinals (note how parent
79      categories are be added automatically when subcategories are added).
80     */  
81    private final static String[][] expectedCategories = {
82      { }, // the root category
83      { "Author" },
84      { "Author", "Tom Clancy" },
85      { "Author", "Richard Dawkins" },
86      { "Author", "Richard Adams" },
87      { "Price" },
88      { "Price", "10" },
89      { "Price", "10", "11" },
90      { "Price", "10", "12" },
91      { "Price", "20" },
92      { "Price", "20", "27" },
93      { "Date" },
94      { "Date", "2006" },
95      { "Date", "2006", "05" },
96      { "Date", "2005" },
97      { "Subject" },
98      { "Subject", "Nonfiction" },
99      { "Subject", "Nonfiction", "Children" },
100     { "Subject", "Nonfiction", "Children", "Animals" },
101     { "Author", "Stephen Jay Gould" },
102     { "Author", "\u05e0\u05d3\u05d1\u3042\u0628" },
103   };
104 
105   /**  fillTaxonomy adds the categories in the categories[] array, and asserts
106     that the additions return exactly the ordinals (in the past - paths)
107     specified in expectedPaths[].
108     Note that this assumes that fillTaxonomy() is called on an empty taxonomy
109     index. Calling it after something else was already added to the taxonomy
110     index will surely have this method fail.
111    */
112   public static void fillTaxonomy(TaxonomyWriter tw) throws IOException {
113     for (int i = 0; i < categories.length; i++) {
114       int ordinal = tw.addCategory(new FacetLabel(categories[i]));
115       int expectedOrdinal = expectedPaths[i][expectedPaths[i].length-1];
116       if (ordinal!=expectedOrdinal) {
117         fail("For category "+showcat(categories[i])+" expected ordinal "+
118             expectedOrdinal+", but got "+ordinal);
119       }
120     }
121   }
122 
123   public static String showcat(String[] path) {
124     if (path==null) {
125       return "<null>";
126     }
127     if (path.length==0) {
128       return "<empty>";
129     }
130     if (path.length==1 && path[0].length()==0) {
131       return "<\"\">";
132     }
133     StringBuilder sb = new StringBuilder(path[0]);
134     for (int i=1; i<path.length; i++) {
135       sb.append('/');
136       sb.append(path[i]);
137     }
138     return sb.toString();
139   }
140 
141   private String showcat(FacetLabel path) {
142     if (path==null) {
143       return "<null>";
144     }
145     if (path.length==0) {
146       return "<empty>";
147     }
148     return "<"+path.toString()+">";
149   }
150 
151   /**  Basic tests for TaxonomyWriter. Basically, we test that
152     IndexWriter.addCategory works, i.e. returns the expected ordinals
153     (this is tested by calling the fillTaxonomy() method above).
154     We do not test here that after writing the index can be read -
155     this will be done in more tests below.
156    */
157   @Test
158   public void testWriter() throws Exception {
159     Directory indexDir = newDirectory();
160     TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
161     fillTaxonomy(tw);
162     // Also check TaxonomyWriter.getSize() - see that the taxonomy's size
163     // is what we expect it to be.
164     assertEquals(expectedCategories.length, tw.getSize());
165     tw.close();
166     indexDir.close();
167   }
168 
169   /**  testWriterTwice is exactly like testWriter, except that after adding
170     all the categories, we add them again, and see that we get the same
171     old ids again - not new categories.
172    */
173   @Test
174   public void testWriterTwice() throws Exception {
175     Directory indexDir = newDirectory();
176     TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
177     fillTaxonomy(tw);
178     // run fillTaxonomy again - this will try to add the same categories
179     // again, and check that we see the same ordinal paths again, not
180     // different ones. 
181     fillTaxonomy(tw);
182     // Let's check the number of categories again, to see that no
183     // extraneous categories were created:
184     assertEquals(expectedCategories.length, tw.getSize());    
185     tw.close();
186     indexDir.close();
187   }
188 
189   /**  testWriterTwice2 is similar to testWriterTwice, except that the index
190     is closed and reopened before attempting to write to it the same
191     categories again. While testWriterTwice can get along with writing
192     and reading correctly just to the cache, testWriterTwice2 checks also
193     the actual disk read part of the writer:
194    */
195   @Test
196   public void testWriterTwice2() throws Exception {
197     Directory indexDir = newDirectory();
198     TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
199     fillTaxonomy(tw);
200     tw.close();
201     tw = new DirectoryTaxonomyWriter(indexDir);
202     // run fillTaxonomy again - this will try to add the same categories
203     // again, and check that we see the same ordinals again, not different
204     // ones, and that the number of categories hasn't grown by the new
205     // additions
206     fillTaxonomy(tw);
207     assertEquals(expectedCategories.length, tw.getSize());    
208     tw.close();
209     indexDir.close();
210   }
211   
212   /**
213    * testWriterTwice3 is yet another test which tests creating a taxonomy
214    * in two separate writing sessions. This test used to fail because of
215    * a bug involving commit(), explained below, and now should succeed.
216    */
217   @Test
218   public void testWriterTwice3() throws Exception {
219     Directory indexDir = newDirectory();
220     // First, create and fill the taxonomy
221     TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
222     fillTaxonomy(tw);
223     tw.close();
224     // Now, open the same taxonomy and add the same categories again.
225     // After a few categories, the LuceneTaxonomyWriter implementation
226     // will stop looking for each category on disk, and rather read them
227     // all into memory and close its reader. The bug was that it closed
228     // the reader, but forgot that it did (because it didn't set the reader
229     // reference to null).
230     tw = new DirectoryTaxonomyWriter(indexDir);
231     fillTaxonomy(tw);
232     // Add one new category, just to make commit() do something:
233     tw.addCategory(new FacetLabel("hi"));
234     // Do a commit(). Here was a bug - if tw had a reader open, it should
235     // be reopened after the commit. However, in our case the reader should
236     // not be open (as explained above) but because it was not set to null,
237     // we forgot that, tried to reopen it, and got an AlreadyClosedException.
238     tw.commit();
239     assertEquals(expectedCategories.length+1, tw.getSize());    
240     tw.close();
241     indexDir.close();
242   }  
243   
244   /**  Another set of tests for the writer, which don't use an array and
245    *  try to distill the different cases, and therefore may be more helpful
246    *  for debugging a problem than testWriter() which is hard to know why
247    *  or where it failed. 
248    */
249   @Test
250   public void testWriterSimpler() throws Exception {
251     Directory indexDir = newDirectory();
252     TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
253     assertEquals(1, tw.getSize()); // the root only
254     // Test that adding a new top-level category works
255     assertEquals(1, tw.addCategory(new FacetLabel("a")));
256     assertEquals(2, tw.getSize());
257     // Test that adding the same category again is noticed, and the
258     // same ordinal (and not a new one) is returned.
259     assertEquals(1, tw.addCategory(new FacetLabel("a")));
260     assertEquals(2, tw.getSize());
261     // Test that adding another top-level category returns a new ordinal,
262     // not the same one
263     assertEquals(2, tw.addCategory(new FacetLabel("b")));
264     assertEquals(3, tw.getSize());
265     // Test that adding a category inside one of the above adds just one
266     // new ordinal:
267     assertEquals(3, tw.addCategory(new FacetLabel("a","c")));
268     assertEquals(4, tw.getSize());
269     // Test that adding the same second-level category doesn't do anything:
270     assertEquals(3, tw.addCategory(new FacetLabel("a","c")));
271     assertEquals(4, tw.getSize());
272     // Test that adding a second-level category with two new components
273     // indeed adds two categories
274     assertEquals(5, tw.addCategory(new FacetLabel("d","e")));
275     assertEquals(6, tw.getSize());
276     // Verify that the parents were added above in the order we expected
277     assertEquals(4, tw.addCategory(new FacetLabel("d")));
278     // Similar, but inside a category that already exists:
279     assertEquals(7, tw.addCategory(new FacetLabel("b", "d","e")));
280     assertEquals(8, tw.getSize());
281     // And now inside two levels of categories that already exist:
282     assertEquals(8, tw.addCategory(new FacetLabel("b", "d","f")));
283     assertEquals(9, tw.getSize());
284     
285     tw.close();
286     indexDir.close();
287   }
288   
289   /**  Test writing an empty index, and seeing that a reader finds in it
290     the root category, and only it. We check all the methods on that
291     root category return the expected results.
292    */
293   @Test
294   public void testRootOnly() throws Exception {
295     Directory indexDir = newDirectory();
296     TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
297     // right after opening the index, it should already contain the
298     // root, so have size 1:
299     assertEquals(1, tw.getSize());
300     tw.close();
301     TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
302     assertEquals(1, tr.getSize());
303     assertEquals(0, tr.getPath(0).length);
304     assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParallelTaxonomyArrays().parents()[0]);
305     assertEquals(0, tr.getOrdinal(new FacetLabel()));
306     tr.close();
307     indexDir.close();
308   }
309 
310   /**  The following test is exactly the same as testRootOnly, except we
311    *  do not close the writer before opening the reader. We want to see
312    *  that the root is visible to the reader not only after the writer is
313    *  closed, but immediately after it is created.
314    */
315   @Test
316   public void testRootOnly2() throws Exception {
317     Directory indexDir = newDirectory();
318     TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
319     tw.commit();
320     TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
321     assertEquals(1, tr.getSize());
322     assertEquals(0, tr.getPath(0).length);
323     assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParallelTaxonomyArrays().parents()[0]);
324     assertEquals(0, tr.getOrdinal(new FacetLabel()));
325     tw.close();
326     tr.close();
327     indexDir.close();
328   }
329 
330   /**  Basic tests for TaxonomyReader's category &lt;=&gt; ordinal transformations
331     (getSize(), getCategory() and getOrdinal()).
332     We test that after writing the index, it can be read and all the
333     categories and ordinals are there just as we expected them to be.
334    */
335   @Test
336   public void testReaderBasic() throws Exception {
337     Directory indexDir = newDirectory();
338     TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
339     fillTaxonomy(tw);
340     tw.close();
341     TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
342 
343     // test TaxonomyReader.getSize():
344     assertEquals(expectedCategories.length, tr.getSize());
345 
346     // test round trips of ordinal => category => ordinal
347     for (int i=0; i<tr.getSize(); i++) {
348       assertEquals(i, tr.getOrdinal(tr.getPath(i)));
349     }
350 
351     // test TaxonomyReader.getCategory():
352     for (int i = 1; i < tr.getSize(); i++) {
353       FacetLabel expectedCategory = new FacetLabel(expectedCategories[i]);
354       FacetLabel category = tr.getPath(i);
355       if (!expectedCategory.equals(category)) {
356         fail("For ordinal "+i+" expected category "+
357             showcat(expectedCategory)+", but got "+showcat(category));
358       }
359     }
360     //  (also test invalid ordinals:)
361     assertNull(tr.getPath(-1));
362     assertNull(tr.getPath(tr.getSize()));
363     assertNull(tr.getPath(TaxonomyReader.INVALID_ORDINAL));
364 
365     // test TaxonomyReader.getOrdinal():
366     for (int i = 1; i < expectedCategories.length; i++) {
367       int expectedOrdinal = i;
368       int ordinal = tr.getOrdinal(new FacetLabel(expectedCategories[i]));
369       if (expectedOrdinal != ordinal) {
370         fail("For category "+showcat(expectedCategories[i])+" expected ordinal "+
371             expectedOrdinal+", but got "+ordinal);
372       }
373     }
374     // (also test invalid categories:)
375     assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(new FacetLabel("non-existant")));
376     assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(new FacetLabel("Author", "Jules Verne")));
377 
378     tr.close();
379     indexDir.close();
380   }
381 
382   /**  Tests for TaxonomyReader's getParent() method.
383     We check it by comparing its results to those we could have gotten by
384     looking at the category string paths (where the parentage is obvious).
385     Note that after testReaderBasic(), we already know we can trust the
386     ordinal &lt;=&gt; category conversions.
387     
388     Note: At the moment, the parent methods in the reader are deprecated,
389     but this does not mean they should not be tested! Until they are
390     removed (*if* they are removed), these tests should remain to see
391     that they still work correctly.
392    */
393 
394   @Test
395   public void testReaderParent() throws Exception {
396     Directory indexDir = newDirectory();
397     TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
398     fillTaxonomy(tw);
399     tw.close();
400     TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
401 
402     // check that the parent of the root ordinal is the invalid ordinal:
403     int[] parents = tr.getParallelTaxonomyArrays().parents();
404     assertEquals(TaxonomyReader.INVALID_ORDINAL, parents[0]);
405 
406     // check parent of non-root ordinals:
407     for (int ordinal=1; ordinal<tr.getSize(); ordinal++) {
408       FacetLabel me = tr.getPath(ordinal);
409       int parentOrdinal = parents[ordinal];
410       FacetLabel parent = tr.getPath(parentOrdinal);
411       if (parent==null) {
412         fail("Parent of "+ordinal+" is "+parentOrdinal+
413         ", but this is not a valid category.");
414       }
415       // verify that the parent is indeed my parent, according to the strings
416       if (!me.subpath(me.length-1).equals(parent)) {
417         fail("Got parent "+parentOrdinal+" for ordinal "+ordinal+
418             " but categories are "+showcat(parent)+" and "+showcat(me)+
419             " respectively.");
420       }
421     }
422 
423     tr.close();
424     indexDir.close();
425   }
426   
427   /**
428    * Tests for TaxonomyWriter's getParent() method. We check it by comparing
429    * its results to those we could have gotten by looking at the category
430    * string paths using a TaxonomyReader (where the parentage is obvious).
431    * Note that after testReaderBasic(), we already know we can trust the
432    * ordinal &lt;=&gt; category conversions from TaxonomyReader.
433    *
434    * The difference between testWriterParent1 and testWriterParent2 is that
435    * the former closes the taxonomy writer before reopening it, while the
436    * latter does not.
437    * 
438    * This test code is virtually identical to that of testReaderParent().
439    */
440   @Test
441   public void testWriterParent1() throws Exception {
442     Directory indexDir = newDirectory();
443     TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
444     fillTaxonomy(tw);
445     tw.close();
446     tw = new DirectoryTaxonomyWriter(indexDir);
447     TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
448     
449     checkWriterParent(tr, tw);
450     
451     tw.close();
452     tr.close();
453     indexDir.close();
454   }
455 
456   @Test
457   public void testWriterParent2() throws Exception {
458     Directory indexDir = newDirectory();
459     TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
460     fillTaxonomy(tw);
461     tw.commit();
462     TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
463     
464     checkWriterParent(tr, tw);
465     
466     tw.close();
467     tr.close();
468     indexDir.close();
469   }
470   
471   private void checkWriterParent(TaxonomyReader tr, TaxonomyWriter tw) throws Exception {
472     // check that the parent of the root ordinal is the invalid ordinal:
473     assertEquals(TaxonomyReader.INVALID_ORDINAL, tw.getParent(0));
474 
475     // check parent of non-root ordinals:
476     for (int ordinal = 1; ordinal < tr.getSize(); ordinal++) {
477       FacetLabel me = tr.getPath(ordinal);
478       int parentOrdinal = tw.getParent(ordinal);
479       FacetLabel parent = tr.getPath(parentOrdinal);
480       if (parent == null) {
481         fail("Parent of " + ordinal + " is " + parentOrdinal
482             + ", but this is not a valid category.");
483       }
484       // verify that the parent is indeed my parent, according to the
485       // strings
486       if (!me.subpath(me.length - 1).equals(parent)) {
487         fail("Got parent " + parentOrdinal + " for ordinal " + ordinal
488             + " but categories are " + showcat(parent) + " and "
489             + showcat(me) + " respectively.");
490       }
491     }
492 
493     // check parent of of invalid ordinals:
494     try {
495       tw.getParent(-1);
496       fail("getParent for -1 should throw exception");
497     } catch (ArrayIndexOutOfBoundsException e) {
498       // ok
499     }
500     try {
501       tw.getParent(TaxonomyReader.INVALID_ORDINAL);
502       fail("getParent for INVALID_ORDINAL should throw exception");
503     } catch (ArrayIndexOutOfBoundsException e) {
504       // ok
505     }
506     try {
507       int parent = tw.getParent(tr.getSize());
508       fail("getParent for getSize() should throw exception, but returned "
509           + parent);
510     } catch (ArrayIndexOutOfBoundsException e) {
511       // ok
512     }
513   }
514 
515   /**
516    * Test TaxonomyReader's child browsing method, getChildrenArrays()
517    * This only tests for correctness of the data on one example - we have
518    * below further tests on data refresh etc.
519    */
520   @Test
521   public void testChildrenArrays() throws Exception {
522     Directory indexDir = newDirectory();
523     TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
524     fillTaxonomy(tw);
525     tw.close();
526     TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
527     ParallelTaxonomyArrays ca = tr.getParallelTaxonomyArrays();
528     int[] youngestChildArray = ca.children();
529     assertEquals(tr.getSize(), youngestChildArray.length);
530     int[] olderSiblingArray = ca.siblings();
531     assertEquals(tr.getSize(), olderSiblingArray.length);
532     for (int i=0; i<expectedCategories.length; i++) {
533       // find expected children by looking at all expectedCategories
534       // for children
535       ArrayList<Integer> expectedChildren = new ArrayList<>();
536       for (int j=expectedCategories.length-1; j>=0; j--) {
537         if (expectedCategories[j].length != expectedCategories[i].length+1) {
538           continue; // not longer by 1, so can't be a child
539         }
540         boolean ischild=true;
541         for (int k=0; k<expectedCategories[i].length; k++) {
542           if (!expectedCategories[j][k].equals(expectedCategories[i][k])) {
543             ischild=false;
544             break;
545           }
546         }
547         if (ischild) {
548           expectedChildren.add(j);
549         }
550       }
551       // check that children and expectedChildren are the same, with the
552       // correct reverse (youngest to oldest) order:
553       if (expectedChildren.size()==0) {
554         assertEquals(TaxonomyReader.INVALID_ORDINAL, youngestChildArray[i]);
555       } else {
556         int child = youngestChildArray[i];
557         assertEquals(expectedChildren.get(0).intValue(),
558             child);
559         for (int j=1; j<expectedChildren.size(); j++) {
560           child = olderSiblingArray[child];
561           assertEquals(expectedChildren.get(j).intValue(),
562               child);
563           // if child is INVALID_ORDINAL we should stop, but
564           // assertEquals would fail in this case anyway.
565         }
566         // When we're done comparing, olderSiblingArray should now point
567         // to INVALID_ORDINAL, saying there are no more children. If it
568         // doesn't, we found too many children...
569         assertEquals(-1, olderSiblingArray[child]);
570       }
571     }
572     tr.close();
573     indexDir.close();
574   }
575 
576   /**
577    * Similar to testChildrenArrays, except rather than look at
578    * expected results, we test for several "invariants" that the results
579    * should uphold, e.g., that a child of a category indeed has this category
580    * as its parent. This sort of test can more easily be extended to larger
581    * example taxonomies, because we do not need to build the expected list
582    * of categories like we did in the above test.
583    */
584   @Test
585   public void testChildrenArraysInvariants() throws Exception {
586     Directory indexDir = newDirectory();
587     TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
588     fillTaxonomy(tw);
589     tw.close();
590     TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
591     ParallelTaxonomyArrays ca = tr.getParallelTaxonomyArrays();
592     int[] children = ca.children();
593     assertEquals(tr.getSize(), children.length);
594     int[] olderSiblingArray = ca.siblings();
595     assertEquals(tr.getSize(), olderSiblingArray.length);
596         
597     // test that the "youngest child" of every category is indeed a child:
598     int[] parents = tr.getParallelTaxonomyArrays().parents();
599     for (int i=0; i<tr.getSize(); i++) {
600       int youngestChild = children[i];
601       if (youngestChild != TaxonomyReader.INVALID_ORDINAL) {
602         assertEquals(i, parents[youngestChild]);
603       }
604     }
605         
606     // test that the "older sibling" of every category is indeed older (lower)
607     // (it can also be INVALID_ORDINAL, which is lower than any ordinal)
608     for (int i=0; i<tr.getSize(); i++) {
609       assertTrue("olderSiblingArray["+i+"] should be <"+i, olderSiblingArray[i] < i);
610     }
611     
612     // test that the "older sibling" of every category is indeed a sibling
613     // (they share the same parent)
614     for (int i=0; i<tr.getSize(); i++) {
615       int sibling = olderSiblingArray[i];
616       if (sibling == TaxonomyReader.INVALID_ORDINAL) {
617         continue;
618       }
619       assertEquals(parents[i], parents[sibling]);
620     }
621     
622     // And now for slightly more complex (and less "invariant-like"...)
623     // tests:
624     
625     // test that the "youngest child" is indeed the youngest (so we don't
626     // miss the first children in the chain)
627     for (int i=0; i<tr.getSize(); i++) {
628       // Find the really youngest child:
629       int j;
630       for (j=tr.getSize()-1; j>i; j--) {
631         if (parents[j]==i) {
632           break; // found youngest child
633         }
634       }
635       if (j==i) { // no child found
636         j=TaxonomyReader.INVALID_ORDINAL;
637       }
638       assertEquals(j, children[i]);
639     }
640 
641     // test that the "older sibling" is indeed the least oldest one - and
642     // not a too old one or -1 (so we didn't miss some children in the
643     // middle or the end of the chain).
644     for (int i=0; i<tr.getSize(); i++) {
645       // Find the youngest older sibling:
646       int j;
647       for (j=i-1; j>=0; j--) {
648         if (parents[j]==parents[i]) {
649           break; // found youngest older sibling
650         }
651       }
652       if (j<0) { // no sibling found
653         j=TaxonomyReader.INVALID_ORDINAL;
654       }
655       assertEquals(j, olderSiblingArray[i]);
656     }
657   
658     tr.close();
659     indexDir.close();
660   }
661   
662   /**
663    * Test how getChildrenArrays() deals with the taxonomy's growth:
664    */
665   @Test
666   public void testChildrenArraysGrowth() throws Exception {
667     Directory indexDir = newDirectory();
668     TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
669     tw.addCategory(new FacetLabel("hi", "there"));
670     tw.commit();
671     TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
672     ParallelTaxonomyArrays ca = tr.getParallelTaxonomyArrays();
673     assertEquals(3, tr.getSize());
674     assertEquals(3, ca.siblings().length);
675     assertEquals(3, ca.children().length);
676     assertTrue(Arrays.equals(new int[] { 1, 2, -1 }, ca.children()));
677     assertTrue(Arrays.equals(new int[] { -1, -1, -1 }, ca.siblings()));
678     tw.addCategory(new FacetLabel("hi", "ho"));
679     tw.addCategory(new FacetLabel("hello"));
680     tw.commit();
681     // Before refresh, nothing changed..
682     ParallelTaxonomyArrays newca = tr.getParallelTaxonomyArrays();
683     assertSame(newca, ca); // we got exactly the same object
684     assertEquals(3, tr.getSize());
685     assertEquals(3, ca.siblings().length);
686     assertEquals(3, ca.children().length);
687     // After the refresh, things change:
688     TaxonomyReader newtr = TaxonomyReader.openIfChanged(tr);
689     assertNotNull(newtr);
690     tr.close();
691     tr = newtr;
692     ca = tr.getParallelTaxonomyArrays();
693     assertEquals(5, tr.getSize());
694     assertEquals(5, ca.siblings().length);
695     assertEquals(5, ca.children().length);
696     assertTrue(Arrays.equals(new int[] { 4, 3, -1, -1, -1 }, ca.children()));
697     assertTrue(Arrays.equals(new int[] { -1, -1, -1, 2, 1 }, ca.siblings()));
698     tw.close();
699     tr.close();
700     indexDir.close();
701   }
702   
703   // Test that getParentArrays is valid when retrieved during refresh
704   @Test
705   public void testTaxonomyReaderRefreshRaces() throws Exception {
706     // compute base child arrays - after first chunk, and after the other
707     Directory indexDirBase = newDirectory();
708     TaxonomyWriter twBase = new DirectoryTaxonomyWriter(indexDirBase);
709     twBase.addCategory(new FacetLabel("a", "0"));
710     final FacetLabel abPath = new FacetLabel("a", "b");
711     twBase.addCategory(abPath);
712     twBase.commit();
713     TaxonomyReader trBase = new DirectoryTaxonomyReader(indexDirBase);
714 
715     final ParallelTaxonomyArrays ca1 = trBase.getParallelTaxonomyArrays();
716     
717     final int abOrd = trBase.getOrdinal(abPath);
718     final int abYoungChildBase1 = ca1.children()[abOrd]; 
719     
720     final int numCategories = atLeast(800);
721     for (int i = 0; i < numCategories; i++) {
722       twBase.addCategory(new FacetLabel("a", "b", Integer.toString(i)));
723     }
724     twBase.close();
725     
726     TaxonomyReader newTaxoReader = TaxonomyReader.openIfChanged(trBase);
727     assertNotNull(newTaxoReader);
728     trBase.close();
729     trBase = newTaxoReader;
730     
731     final ParallelTaxonomyArrays ca2 = trBase.getParallelTaxonomyArrays();
732     final int abYoungChildBase2 = ca2.children()[abOrd];
733     
734     int numRetries = atLeast(50);
735     for (int retry = 0; retry < numRetries; retry++) {
736       assertConsistentYoungestChild(abPath, abOrd, abYoungChildBase1, abYoungChildBase2, retry, numCategories);
737     }
738     
739     trBase.close();
740     indexDirBase.close();
741   }
742 
743   private void assertConsistentYoungestChild(final FacetLabel abPath,
744       final int abOrd, final int abYoungChildBase1, final int abYoungChildBase2, final int retry, int numCategories)
745       throws Exception {
746     SlowRAMDirectory indexDir = new SlowRAMDirectory(-1, null); // no slowness for initialization
747     TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
748     tw.addCategory(new FacetLabel("a", "0"));
749     tw.addCategory(abPath);
750     tw.commit();
751     
752     final DirectoryTaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
753     for (int i = 0; i < numCategories; i++) {
754       final FacetLabel cp = new FacetLabel("a", "b", Integer.toString(i));
755       tw.addCategory(cp);
756       assertEquals("Ordinal of "+cp+" must be invalid until Taxonomy Reader was refreshed", TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(cp));
757     }
758     tw.close();
759     
760     final AtomicBoolean stop = new AtomicBoolean(false);
761     final Throwable[] error = new Throwable[] { null };
762     final int retrieval[] = { 0 }; 
763     
764     Thread thread = new Thread("Child Arrays Verifier") {
765       @Override
766       public void run() {
767         setPriority(1 + getPriority());
768         try {
769           while (!stop.get()) {
770             int lastOrd = tr.getParallelTaxonomyArrays().parents().length - 1;
771             assertNotNull("path of last-ord " + lastOrd + " is not found!", tr.getPath(lastOrd));
772             assertChildrenArrays(tr.getParallelTaxonomyArrays(), retry, retrieval[0]++);
773             sleep(10); // don't starve refresh()'s CPU, which sleeps every 50 bytes for 1 ms
774           }
775         } catch (Throwable e) {
776           error[0] = e;
777           stop.set(true);
778         }
779       }
780 
781       private void assertChildrenArrays(ParallelTaxonomyArrays ca, int retry, int retrieval) {
782         final int abYoungChild = ca.children()[abOrd];
783         assertTrue(
784             "Retry "+retry+": retrieval: "+retrieval+": wrong youngest child for category "+abPath+" (ord="+abOrd+
785             ") - must be either "+abYoungChildBase1+" or "+abYoungChildBase2+" but was: "+abYoungChild,
786             abYoungChildBase1==abYoungChild ||
787             abYoungChildBase2==ca.children()[abOrd]);
788       }
789     };
790     thread.start();
791     
792     indexDir.setSleepMillis(1); // some delay for refresh
793     TaxonomyReader newTaxoReader = TaxonomyReader.openIfChanged(tr);
794     if (newTaxoReader != null) {
795       newTaxoReader.close();
796     }
797     
798     stop.set(true);
799     thread.join();
800     assertNull("Unexpcted exception at retry "+retry+" retrieval "+retrieval[0]+": \n"+stackTraceStr(error[0]), error[0]);
801     
802     tr.close();
803   }
804 
805   /** Grab the stack trace into a string since the exception was thrown in a thread and we want the assert 
806    * outside the thread to show the stack trace in case of failure.   */
807   private String stackTraceStr(final Throwable error) {
808     if (error == null) {
809       return "";
810     }
811     StringWriter sw = new StringWriter();
812     PrintWriter pw = new PrintWriter(sw);
813     error.printStackTrace(pw);
814     pw.close();
815     return sw.toString();
816   }
817   
818   /**  Test that if separate reader and writer objects are opened, new
819     categories written into the writer are available to a reader only
820     after a commit().
821     Note that this test obviously doesn't cover all the different
822     concurrency scenarios, all different methods, and so on. We may
823     want to write more tests of this sort.
824 
825     This test simulates what would happen when there are two separate
826     processes, one doing indexing, and the other searching, and each opens
827     its own object (with obviously no connection between the objects) using
828     the same disk files. Note, though, that this test does not test what
829     happens when the two processes do their actual work at exactly the same
830     time.
831     It also doesn't test multi-threading.
832    */
833   @Test
834   public void testSeparateReaderAndWriter() throws Exception {
835     Directory indexDir = newDirectory();
836     TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
837     tw.commit();
838     TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
839 
840     assertEquals(1, tr.getSize()); // the empty taxonomy has size 1 (the root)
841     tw.addCategory(new FacetLabel("Author"));
842     assertEquals(1, tr.getSize()); // still root only...
843     assertNull(TaxonomyReader.openIfChanged(tr)); // this is not enough, because tw.commit() hasn't been done yet
844     assertEquals(1, tr.getSize()); // still root only...
845     tw.commit();
846     assertEquals(1, tr.getSize()); // still root only...
847     TaxonomyReader newTaxoReader = TaxonomyReader.openIfChanged(tr);
848     assertNotNull(newTaxoReader);
849     tr.close();
850     tr = newTaxoReader;
851     
852     int author = 1;
853     try {
854       assertEquals(TaxonomyReader.ROOT_ORDINAL, tr.getParallelTaxonomyArrays().parents()[author]);
855       // ok
856     } catch (ArrayIndexOutOfBoundsException e) {
857       fail("After category addition, commit() and refresh(), getParent for "+author+" should NOT throw exception");
858     }
859     assertEquals(2, tr.getSize()); // finally, see there are two categories
860 
861     // now, add another category, and verify that after commit and refresh
862     // the parent of this category is correct (this requires the reader
863     // to correctly update its prefetched parent vector), and that the
864     // old information also wasn't ruined:
865     tw.addCategory(new FacetLabel("Author", "Richard Dawkins"));
866     int dawkins = 2;
867     tw.commit();
868     newTaxoReader = TaxonomyReader.openIfChanged(tr);
869     assertNotNull(newTaxoReader);
870     tr.close();
871     tr = newTaxoReader;
872     int[] parents = tr.getParallelTaxonomyArrays().parents();
873     assertEquals(author, parents[dawkins]);
874     assertEquals(TaxonomyReader.ROOT_ORDINAL, parents[author]);
875     assertEquals(TaxonomyReader.INVALID_ORDINAL, parents[TaxonomyReader.ROOT_ORDINAL]);
876     assertEquals(3, tr.getSize()); 
877     tw.close();
878     tr.close();
879     indexDir.close();
880   }
881   
882   @Test
883   public void testSeparateReaderAndWriter2() throws Exception {
884     Directory indexDir = newDirectory();
885     TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
886     tw.commit();
887     TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
888 
889     // Test getOrdinal():
890     FacetLabel author = new FacetLabel("Author");
891 
892     assertEquals(1, tr.getSize()); // the empty taxonomy has size 1 (the root)
893     assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author));
894     tw.addCategory(author);
895     // before commit and refresh, no change:
896     assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author));
897     assertEquals(1, tr.getSize()); // still root only...
898     assertNull(TaxonomyReader.openIfChanged(tr)); // this is not enough, because tw.commit() hasn't been done yet
899     assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author));
900     assertEquals(1, tr.getSize()); // still root only...
901     tw.commit();
902     // still not enough before refresh:
903     assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author));
904     assertEquals(1, tr.getSize()); // still root only...
905     TaxonomyReader newTaxoReader = TaxonomyReader.openIfChanged(tr);
906     assertNotNull(newTaxoReader);
907     tr.close();
908     tr = newTaxoReader;
909     assertEquals(1, tr.getOrdinal(author));
910     assertEquals(2, tr.getSize());
911     tw.close();
912     tr.close();
913     indexDir.close();
914   }
915   
916   /**
917    * fillTaxonomyCheckPaths adds the categories in the categories[] array,
918    * and asserts that the additions return exactly paths specified in
919    * expectedPaths[]. This is the same add fillTaxonomy() but also checks
920    * the correctness of getParent(), not just addCategory().
921    * Note that this assumes that fillTaxonomyCheckPaths() is called on an empty
922    * taxonomy index. Calling it after something else was already added to the
923    * taxonomy index will surely have this method fail.
924    */
925   public static void fillTaxonomyCheckPaths(TaxonomyWriter tw) throws IOException {
926     for (int i = 0; i < categories.length; i++) {
927       int ordinal = tw.addCategory(new FacetLabel(categories[i]));
928       int expectedOrdinal = expectedPaths[i][expectedPaths[i].length-1];
929       if (ordinal!=expectedOrdinal) {
930         fail("For category "+showcat(categories[i])+" expected ordinal "+
931             expectedOrdinal+", but got "+ordinal);
932       }
933       for (int j=expectedPaths[i].length-2; j>=0; j--) {
934         ordinal = tw.getParent(ordinal);
935         expectedOrdinal = expectedPaths[i][j];
936         if (ordinal!=expectedOrdinal) {
937           fail("For category "+showcat(categories[i])+" expected ancestor level "+
938               (expectedPaths[i].length-1-j)+" was "+expectedOrdinal+
939               ", but got "+ordinal);
940         }
941       }    
942     }
943   }
944   
945   // After fillTaxonomy returned successfully, checkPaths() checks that
946   // the getParent() calls return as expected, from the table
947   public static void checkPaths(TaxonomyWriter tw) throws IOException {
948     for (int i = 0; i < categories.length; i++) {
949       int ordinal = expectedPaths[i][expectedPaths[i].length-1];
950       for (int j=expectedPaths[i].length-2; j>=0; j--) {
951         ordinal = tw.getParent(ordinal);
952         int expectedOrdinal = expectedPaths[i][j];
953         if (ordinal!=expectedOrdinal) {
954           fail("For category "+showcat(categories[i])+" expected ancestor level "+
955               (expectedPaths[i].length-1-j)+" was "+expectedOrdinal+
956               ", but got "+ordinal);
957         }
958       }
959       assertEquals(TaxonomyReader.ROOT_ORDINAL, tw.getParent(expectedPaths[i][0]));
960     }
961     assertEquals(TaxonomyReader.INVALID_ORDINAL, tw.getParent(TaxonomyReader.ROOT_ORDINAL));
962   }
963   
964   /**
965    * Basic test for TaxonomyWriter.getParent(). This is similar to testWriter
966    * above, except we also check the parents of the added categories, not just
967    * the categories themselves.
968    */
969   @Test
970   public void testWriterCheckPaths() throws Exception {
971     Directory indexDir = newDirectory();
972     TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
973     fillTaxonomyCheckPaths(tw);
974     // Also check TaxonomyWriter.getSize() - see that the taxonomy's size
975     // is what we expect it to be.
976     assertEquals(expectedCategories.length, tw.getSize());
977     tw.close();
978     indexDir.close();
979   }
980   
981   /**
982    * testWriterCheckPaths2 is the path-checking variant of testWriterTwice
983    * and testWriterTwice2. After adding all the categories, we add them again,
984    * and see that we get the same old ids and paths. We repeat the path checking
985    * yet again after closing and opening the index for writing again - to see
986    * that the reading of existing data from disk works as well.
987    */
988   @Test
989   public void testWriterCheckPaths2() throws Exception {
990     Directory indexDir = newDirectory();
991     TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
992     fillTaxonomy(tw);
993     checkPaths(tw);
994     fillTaxonomy(tw);
995     checkPaths(tw);
996     tw.close();
997 
998     tw = new DirectoryTaxonomyWriter(indexDir);
999     checkPaths(tw);
1000     fillTaxonomy(tw);
1001     checkPaths(tw);
1002     tw.close();
1003     indexDir.close();
1004   }
1005 
1006   @Test
1007   public void testNRT() throws Exception {
1008     Directory dir = newDirectory();
1009     DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir);
1010     TaxonomyReader reader = new DirectoryTaxonomyReader(writer);
1011     
1012     FacetLabel cp = new FacetLabel("a");
1013     writer.addCategory(cp);
1014     TaxonomyReader newReader = TaxonomyReader.openIfChanged(reader);
1015     assertNotNull("expected a new instance", newReader);
1016     assertEquals(2, newReader.getSize());
1017     assertNotSame(TaxonomyReader.INVALID_ORDINAL, newReader.getOrdinal(cp));
1018     reader.close();
1019     reader = newReader;
1020     
1021     writer.close();
1022     reader.close();
1023     
1024     dir.close();
1025   }
1026 
1027 //  TODO (Facet): test multiple readers, one writer. Have the multiple readers
1028 //  using the same object (simulating threads) or different objects
1029 //  (simulating processes).
1030 }